clear
clear matrix
set matsize 1000
set more off


*this command opens the BSD dataset for the year 2005
use ".........\bsd2005.dta", clear

*this command only keeps active firms
keep if active==1

*the following generate and replace commands create the aggregare industry breakdown compatible with the input-output tables
gen nace2=substr(sic, 1, 2)
gen aggregate_ind=.
replace aggregate_ind=1 if nace2=="01"
replace aggregate_ind=1 if nace2=="02"
replace aggregate_ind=1 if nace2=="05"
replace aggregate_ind=2 if nace2=="10"
replace aggregate_ind=2 if nace2=="11"
replace aggregate_ind=2 if nace2=="13"
replace aggregate_ind=2 if nace2=="14"
replace aggregate_ind=3 if nace2=="15"
replace aggregate_ind=3 if nace2=="16"
replace aggregate_ind=4 if nace2=="17"
replace aggregate_ind=4 if nace2=="18"
replace aggregate_ind=4 if nace2=="19"
replace aggregate_ind=5 if nace2=="20"
replace aggregate_ind=6 if nace2=="21"
replace aggregate_ind=6 if nace2=="22"
replace aggregate_ind=7 if nace2=="23"
replace aggregate_ind=8 if nace2=="24"
replace aggregate_ind=9 if nace2=="25"
replace aggregate_ind=10 if nace2=="26"
replace aggregate_ind=11 if nace2=="27"
replace aggregate_ind=12 if nace2=="28"
replace aggregate_ind=13 if nace2=="29"
replace aggregate_ind=14 if nace2=="30"
replace aggregate_ind=15 if nace2=="31"
replace aggregate_ind=16 if nace2=="32"
replace aggregate_ind=17 if nace2=="33"
replace aggregate_ind=18 if nace2=="34"
replace aggregate_ind=19 if nace2=="35"
replace aggregate_ind=20 if nace2=="36"
replace aggregate_ind=20 if nace2=="37"
replace aggregate_ind=21 if nace2=="40"
replace aggregate_ind=21 if nace2=="41"
replace aggregate_ind=22 if nace2=="45"
replace aggregate_ind=23 if nace2=="50"
replace aggregate_ind=23 if nace2=="51"
replace aggregate_ind=23 if nace2=="52"
replace aggregate_ind=24 if nace2=="55"
replace aggregate_ind=25 if nace2=="60"
replace aggregate_ind=25 if nace2=="61"
replace aggregate_ind=25 if nace2=="62"
replace aggregate_ind=25 if nace2=="63"
replace aggregate_ind=26 if nace2=="64"
replace aggregate_ind=27 if nace2=="65"
replace aggregate_ind=27 if nace2=="66"
replace aggregate_ind=27 if nace2=="67"
replace aggregate_ind=28 if nace2=="70"
replace aggregate_ind=29 if nace2=="71"
replace aggregate_ind=30 if nace2=="72"
replace aggregate_ind=31 if nace2=="73"
replace aggregate_ind=32 if nace2=="74"
replace aggregate_ind=33 if nace2=="75"
replace aggregate_ind=34 if nace2=="80"
replace aggregate_ind=35 if nace2=="85"
replace aggregate_ind=35 if nace2=="90"
replace aggregate_ind=35 if nace2=="91"
replace aggregate_ind=35 if nace2=="92"
replace aggregate_ind=35 if nace2=="93"
replace aggregate_ind=35 if nace2=="95"
replace aggregate_ind=35 if nace2=="98"
replace aggregate_ind=35 if nace2=="99"

*we drop cases that we cannot assign
drop if aggregate_ind==.

*we drop cases with missing turnover or turnover less than 50 thousand pounds 
drop if turnover==.
drop if turnover<=50

*we now trim the data
egen p99_e=pctile(turnover), p(97.5) 
drop if turnover>=p99_e
drop p99_e



*finally we compute what we need
gen ln_turnover=log(turnover)
egen tot_turn_ind=sum(turnover), by(aggregate_ind)
gen mark_share_2=(turnover/tot_turn_ind)^2
egen double SD_log_r=sd(ln_turnover), by(aggregate_ind)
egen double Mean_log_turnover=mean(ln_turnover), by(aggregate_ind)
gen double dev_mean_4=(ln_turnover-Mean_log_turnover)^4
egen double Mean_dev_mean_4=mean(dev_mean_4), by(aggregate_ind)
gen double Sigma_4=SD_log_r^4

gen n_firms=1
collapse (sum) n_firms mark_share_2 (mean) SD_log_r Sigma_4 Mean_dev_mean_4, by(aggregate_ind)

save "....\bs_data_all_fin_robust_revenue.dta", replace
export excel using "....\bs_data_all_fin_robust_revenue.xls", replace
